import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
import pandas as pd
import rasterio
from sklearn.model_selection import train_test_split
from rasterio.plot import show_hist, show
import fiona
from rasterio.tools.mask import mask
from pandas.util.testing import assert_frame_equal
from rasterio import Affine
import geopandas as gpd
pd.options.mode.use_inf_as_na = True
from osgeo import gdal
Cargando dataset con información
dataset = pd.read_csv('/Users/oscar/Code/Maestria/perc_remota/taller_3/datasets/dos.csv', index_col=0).drop(['elevation'], axis=1)
dataset = dataset.fillna(0)
Cargando polÃgonos de entrenamiento
class_1 = pd.read_csv('/Users/oscar/Code/Maestria/perc_remota/taller_3/datasets/1.csv', index_col=0).drop(['elevation'], axis=1).fillna(0)
class_2 = pd.read_csv('/Users/oscar/Code/Maestria/perc_remota/taller_3/datasets/2.csv', index_col=0).drop(['elevation'], axis=1).fillna(0)
class_3 = pd.read_csv('/Users/oscar/Code/Maestria/perc_remota/taller_3/datasets/3.csv', index_col=0).drop(['elevation'], axis=1).fillna(0)
class_4 = pd.read_csv('/Users/oscar/Code/Maestria/perc_remota/taller_3/datasets/4.csv', index_col=0).drop(['elevation'], axis=1).fillna(0)
class_5 = pd.read_csv('/Users/oscar/Code/Maestria/perc_remota/taller_3/datasets/5.csv', index_col=0).drop(['elevation'], axis=1).fillna(0)
class_6 = pd.read_csv('/Users/oscar/Code/Maestria/perc_remota/taller_3/datasets/6.csv', index_col=0).drop(['elevation'], axis=1).fillna(0)
class_7 = pd.read_csv('/Users/oscar/Code/Maestria/perc_remota/taller_3/datasets/7.csv', index_col=0).drop(['elevation'], axis=1).fillna(0)
class_1 = class_1[class_1['ndvi']!=0]
class_2 = class_2[class_2['ndvi']!=0]
class_3 = class_3[class_3['ndvi']!=0]
class_4 = class_4[class_4['ndvi']!=0]
class_5 = class_5[class_5['ndvi']!=0]
class_6 = class_6[class_6['ndvi']!=0]
class_7 = class_7[class_7['ndvi']!=0]
Muestreando datos de entrenamiento y datos de prueba
no_data_class = dataset[(dataset['ndvi']==0)]
no_data_class['class'] = 0
test_size = 0.5
class_1_train, class_1_test = train_test_split(class_1, test_size=test_size)
class_2_train, class_2_test = train_test_split(class_2, test_size=test_size)
class_3_train, class_3_test = train_test_split(class_3, test_size=test_size)
class_4_train, class_4_test = train_test_split(class_4, test_size=test_size)
class_5_train, class_5_test = train_test_split(class_5, test_size=test_size)
class_6_train, class_6_test = train_test_split(class_6, test_size=test_size)
class_7_train, class_7_test = train_test_split(class_7, test_size=test_size)
no_data_train, no_data_test = train_test_split(no_data_class, test_size=test_size)
train_data = pd.concat([
class_1_train,
class_2_train,
class_3_train,
class_4_train,
class_5_train,
class_6_train,
class_7_train,
no_data_train],
ignore_index=True
)
test_data = pd.concat([
class_1_test,
class_2_test,
class_3_test,
class_4_test,
class_5_test,
class_6_test,
class_7_test,
no_data_test],
ignore_index=True
)
train_labels = train_data['class'].as_matrix()
train_data = train_data.drop(['class'], axis=1).as_matrix()
test_labels = test_data['class'].as_matrix()
test_data = test_data.drop(['class'], axis=1).as_matrix()
rf = RandomForestClassifier(n_estimators=500, oob_score=True)
rf = rf.fit(train_data, train_labels)
accuracy = rf.score(test_data, test_labels)
print('Our OOB prediction of accuracy is: {oob}%'.format(oob=accuracy))
df = pd.DataFrame()
df['truth'] = test_labels
df['predict'] = rf.predict(test_data)
print(pd.crosstab(df['truth'], df['predict'], margins=True))
predict_data = dataset.as_matrix()
clasification = rf.predict(predict_data)
shape_image = (4397, 5286)
img2 = clasification.reshape(shape_image[0],shape_image[1])
plt.rcParams["figure.figsize"] = (50,50)
plt.rcParams["figure.dpi"] = 150
show(img2)
infile = '/Users/oscar/Code/Maestria/perc_remota/taller_3/images/dos-reflectance.tif'
data = gdal.Open(infile)
arr = img2
print(data.GetGeoTransform())
# Do some processing....
# Save out to a GeoTiff
# First of all, gather some information from the original file
[cols,rows] = shape_image
trans = data.GetGeoTransform()
proj = data.GetProjection()
outfile = '/Users/oscar/Code/Maestria/perc_remota/taller_3/images/classsification.tif'
# Create the file, using the information from the original file
outdriver = gdal.GetDriverByName("GTiff")
outdata = outdriver.Create(str(outfile), rows, cols, 1, gdal.GDT_Float32)
# Write the array to the file, which is the original array in this example
outdata.GetRasterBand(1).WriteArray(arr)
# Set a no data value if required
outdata.GetRasterBand(1).SetNoDataValue(-1)
# Georeference the image
outdata.SetGeoTransform(trans)
# Write projection information
outdata.SetProjection(proj)